rm(list = ls())
library(data.table)
library(tidyr)
library(maps)
library(haven)
library(ggplot2)
library(dplyr)
library(readxl)
library(ggrepel)
library(wordcloud)
library(lme4)
library(lmerTest)
library(reshape2)
library(patchwork)
library(psych)
PREP THE DATASET FOR ANALYSIS WVS 5 & 6 ####################
#read the data (Wave 5)
# Data of Wave 5
WV5_data <- readRDS("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/F00007944-WV5_Data_R_v20180912.rds")
# Convert WV5_data-object in data.frame
WV5_data_df <- as.data.frame(WV5_data)
# show first five columns
WV5_data_df
#rename the variables
WV5_data <- WV5_data_df %>%
rename(gender = V235, age = V237, country_code = V2, wave = V1, risktaking = V86, children = V56, married = V55, employed = V241, education = V238)
WV5_data
colnames(WV5_data)
[1] "wave" "V1A" "V1B" "country_code" "V2A" "V3" "V4"
[8] "V4_CO" "V5" "V5_CO" "V6" "V6_CO" "V7" "V7_CO"
[15] "V8" "V8_CO" "V9" "V9_CO" "V10" "V11" "V12"
[22] "V13" "V14" "V15" "V16" "V17" "V18" "V19"
[29] "V20" "V21" "V22" "V23" "V24" "V25" "V26"
[36] "V27" "V28" "V29" "V30" "V31" "V32" "V33"
[43] "V34" "V35" "V36" "V37" "V38" "V39" "V40"
[50] "V41" "V42" "V43" "V43_01" "V43_02" "V43_03" "V43_04"
[57] "V43_05" "V43_06" "V43_07" "V43_08" "V43_09" "V43_10" "V43_11"
[64] "V43_12" "V43_13" "V43_14" "V43_15" "V43_16" "V43_17" "V43_18"
[71] "V43_19" "V43_20" "V43_21" "V43_22" "V43_23" "V43_24" "V43_25"
[78] "V43_26" "V43_27" "V43_28" "V43_29" "V43_30" "V44" "V45"
[85] "V46" "V47" "V48" "V49" "V50" "V51" "V52"
[92] "V53" "V54" "married" "children" "V57" "V58" "V59"
[99] "V60" "V61" "V62" "V63" "V64" "V65" "V66"
[106] "V67" "V68" "V69" "V69_HK" "V70" "V70_HK" "V71"
[113] "V72" "V73" "V73_HK" "V74" "V74_HK" "V75" "V76"
[120] "V77" "V78" "V79" "V80" "V81" "V82" "V83"
[127] "V84" "V85" "risktaking" "V87" "V88" "V89" "V90"
[134] "V91" "V92" "V93" "V94" "V95" "V96" "V97"
[141] "V98" "V99" "V100" "V101" "V102" "V103" "V104"
[148] "V105" "V106" "V107" "V108" "V109" "V110" "V111"
[155] "V112" "V113" "V114" "V115" "V116" "V117" "V118"
[162] "V119" "V120" "V121" "V122" "V123" "V124" "V125"
[169] "V126" "V127" "V128" "V129" "V130" "V130_CA_1" "V130_IQ_1"
[176] "V130_IQ_2" "V130_IQ_3" "V130_IQ_4" "V130_NZ_1" "V130_NZ_2" "V131" "V132"
[183] "V133" "V134" "V135" "V136" "V137" "V138" "V139"
[190] "V140" "V141" "V142" "V143" "V144" "V145" "V146_00"
[197] "V146_01" "V146_02" "V146_03" "V146_04" "V146_05" "V146_06" "V146_07"
[204] "V146_08" "V146_09" "V146_10" "V146_11" "V146_12" "V146_13" "V146_14"
[211] "V146_15" "V146_16" "V146_17" "V146_18" "V146_19" "V146_20" "V146_21"
[218] "V146_22" "V147" "V148" "V149" "V150" "V151" "V151_IQ_A"
[225] "V151_IQ_B" "V152" "V153" "V154" "V155" "V156" "V157"
[232] "V158" "V159" "V160" "V161" "V162" "V163" "V164"
[239] "V165" "V166" "V167" "V168" "V169" "V170" "V171"
[246] "V172" "V173" "V174" "V175" "V176" "V177" "V178"
[253] "V179" "V180" "V181" "V182" "V183" "V184" "V185"
[260] "V186" "V187" "V188" "V189" "V190" "V191" "V192"
[267] "V193" "V194" "V195" "V196" "V197" "V198" "V199"
[274] "V200" "V201" "V202" "V203" "V204" "V205" "V206"
[281] "V207" "V208" "V209" "V210" "V211" "V212" "V213A"
[288] "V213B" "V213C" "V213D" "V213E" "V213F" "V213G" "V213H"
[295] "V213K" "V213L" "V213M" "V213N" "V214" "V215" "V216"
[302] "V217" "V218" "V219" "V220" "V221" "V222" "V223"
[309] "V224" "V225" "V226" "V227" "V228" "V229" "V230"
[316] "V231" "V232" "V233" "V233A" "V234" "gender" "V236"
[323] "age" "education" "V238CS" "V239" "V240" "employed" "V242"
[330] "V242A_CO" "V243" "V244" "V245" "V246" "V247" "V248"
[337] "V249" "V250" "V251" "V252" "V252B" "V253" "V253CS"
[344] "V254" "V255" "V255CS" "V256" "V257" "V257B" "V257C"
[351] "V258" "V259" "V259A" "V260" "V261" "V262" "V263"
[358] "V264" "V265" "S024" "S025" "Y001" "Y002" "Y003"
[365] "SACSECVAL" "SECVALWGT" "RESEMAVAL" "WEIGHTB" "I_AUTHORITY" "I_NATIONALISM" "I_DEVOUT"
[372] "DEFIANCE" "WEIGHT1A" "I_RELIGIMP" "I_RELIGBEL" "I_RELIGPRAC" "DISBELIEF" "WEIGHT2A"
[379] "I_NORM1" "I_NORM2" "I_NORM3" "RELATIVISM" "WEIGHT3A" "I_TRUSTARMY" "I_TRUSTPOLICE"
[386] "I_TRUSTCOURTS" "SCEPTICISM" "WEIGHT4A" "I_INDEP" "I_IMAGIN" "I_NONOBED" "AUTONOMY"
[393] "WEIGHT1B" "I_WOMJOB" "I_WOMPOL" "I_WOMEDU" "EQUALITY" "WEIGHT2B" "I_HOMOLIB"
[400] "I_ABORTLIB" "I_DIVORLIB" "CHOICE" "WEIGHT3B" "I_VOICE1" "I_VOICE2" "I_VOI2_00"
[407] "VOICE" "WEIGHT4B" "S001" "S007" "S018" "S019" "S021"
[414] "COW"
#select only the variables of interest
WV5_data <- WV5_data %>%
dplyr::select(gender, age, country_code, wave, risktaking, children, employed, education, married)
WV5_data
countrynames <- read.csv("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/countrynames.txt", header = FALSE, as.is = TRUE)
colnames(countrynames) <- c("code", "name")
# Assuming WV5_data has a column named country_code
WV5_data$country <- countrynames$name[match(WV5_data$country_code, countrynames$code)]
# Check the frequency of each country in the new column
table(WV5_data$country)
Andorra Argentina Australia Brazil Bulgaria
1003 1002 1421 1500 1001
Burkina Faso Canada Chile China Colombia
1534 2164 1000 1991 3025
Cyprus (G) Egypt Ethiopia Finland France
1050 3051 1500 1014 1001
Georgia Germany Ghana Great Britain Guatemala
1500 2064 1534 1041 1000
Hong Kong Hungary India Indonesia Iran
1252 1007 2001 2015 2667
Iraq Italy Japan Jordan Malaysia
2701 1012 1096 1200 1201
Mali Mexico Moldova Morocco Netherlands
1534 1560 1046 1200 1050
New Zealand Norway Peru Poland Romania
954 1025 1500 1000 1776
Russia Rwanda Serbia and Montenegro Slovenia South Africa
2033 1507 1220 1037 2988
South Korea Spain Sweden Switzerland Taiwan
1200 1200 1003 1241 1227
Thailand Trinidad and Tobago Turkey Ukraine United States
1534 1002 1346 1000 1249
Uruguay Viet Nam Zambia
1000 1495 1500
# Display the updated WV5_data
print(WV5_data)
#control wave 5
length(unique(WV5_data$country))
[1] 58
nrow(WV5_data) # number of individuals
[1] 83975
range(WV5_data$age)
[1] -5 98
table(WV5_data$gender) # sex table(data$sex)/nrow(data)
-2 1 2
96 40218 43661
#Read Dataset (Wave 6)
load("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/WV6_Data_R_v20201117.rdata")
WV6_data <- WV6_Data_R_v20201117
print(WV6_data)
#rename variables
WV6_data <- WV6_data %>%
rename(wave = V1, gender = V240, age = V242,country_code = V2, risktaking = V76, children = V58, married = V57, employed = V229, education = V248)
#select only the variables of interest
WV6_data <- WV6_data %>%
dplyr::select(gender, age, country_code, wave, risktaking, children, employed, education, married)
WV6_data
#decode daraset (Wave 6)
countrynames = read.csv("/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/dataset/WV6_dataset_wave_5_6/countrynames.txt", header=FALSE,as.is=TRUE)
colnames(countrynames) = c("code", "name")
WV6_data$country = countrynames$name [match(WV6_data$country_code, countrynames$code)]
table(WV6_data$country)
Algeria Argentina Armenia Australia Azerbaijan Belarus
1200 1030 1100 1477 1002 1535
Brazil Chile China Colombia Cyprus (G) Ecuador
1486 1000 2300 1512 1000 1202
Egypt Estonia Georgia Germany Ghana Haiti
1523 1533 1202 2046 1552 1996
Hong Kong India Iraq Japan Jordan Kazakhstan
1000 4078 1200 2443 1200 1500
Kuwait Kyrgyzstan Lebanon Libya Malaysia Mexico
1303 1500 1200 2131 1300 2000
Morocco Netherlands New Zealand Nigeria Pakistan Palestine
1200 1902 841 1759 1200 1000
Peru Philippines Poland Qatar Romania Russia
1210 1200 966 1060 1503 2500
Rwanda Singapore Slovenia South Africa South Korea Spain
1527 1972 1069 3531 1200 1189
Sweden Taiwan Thailand Trinidad and Tobago Tunisia Turkey
1206 1238 1200 999 1205 1605
Ukraine United States Uruguay Uzbekistan Yemen Zimbabwe
1500 2232 1000 1500 1000 1500
WV6_data
#control wave 6
length(unique(WV6_data$country))
[1] 60
nrow(WV6_data) # number of individuals
[1] 89565
range(WV6_data$age, na.rm = TRUE)
[1] 16 102
table(WV6_data$gender) # sex table(data$sex)/nrow(data)
1 2
42723 46751
#combine the 2 dataset (Wave 6 + Wave 5)
WVS_data = rbind(WV5_data, WV6_data)
WVS_data
length(unique(WVS_data$country))
[1] 80
nrow(WVS_data) # number of individuals
[1] 173540
range(WVS_data$age, na.rm=TRUE)
[1] -5 102
table(WVS_data$gender) # sex table(data$sex)/nrow(data)
-2 1 2
96 82941 90412
#exclusion of participants and omission of missing data (na)
WVS_data = subset(WVS_data, risktaking > 0 & gender > 0 & age >0 & education > 0 & employed > 0 & married > 0 & children >= 0)
### WVS_data <- na.omit(WVS_data) ### excluded because it is not in code from Mata et al., 2016
# Use the mutate function to change the country name
WVS_data <- WVS_data %>%
mutate(country = ifelse(country == "Great Britain", "United Kingdom", country))
length(unique(WVS_data$country))
[1] 77
nrow(WVS_data) # number of individuals
[1] 149626
range(WVS_data$age, na.rm=TRUE)
[1] 15 99
table(WVS_data$gender) # sex table(data$sex)/nrow(data)
1 2
71689 77937
# Neue Spalte 'education_cat' erstellen und initialisieren
WVS_data$education_cat <- NA
# Kategorien zuweisen basierend auf den Bildungsstufen
WVS_data$education_cat <- ifelse(WVS_data$education %in% c(1, 2), "incomplete or no primary education",
ifelse(WVS_data$education %in% c(3, 4, 5, 6), "No Uni",
ifelse(WVS_data$education %in% c(7, 8, 9), "Uni", NA)))
# Tabelle der neuen Kategorien anzeigen
table(WVS_data$education_cat)
incomplete or no primary education No Uni Uni
19354 70033 60239
WVS_data$gender = ifelse(WVS_data$gender == 1, 0, 1) # sex: male vs. female
WVS_data$children = ifelse(WVS_data$children == 0, 0, 1) # children: no vs. yes
WVS_data$married = ifelse(WVS_data$married == 1, 1, 0) # married: yes vs. no
WVS_data$employed = ifelse(WVS_data$employed < 4, 1, 0) # employed: yes vs. no
WVS_data$education = ifelse(WVS_data$education < 4, 0, 1) # education: no primary vs. primary+
head(WVS_data)
length(unique(WVS_data$country))
[1] 77
nrow(WVS_data)
[1] 149626
range(WVS_data$age)
[1] 15 99
table(WVS_data$gender)
0 1
71689 77937
table(WVS_data$children)
0 1
44434 105192
table(WVS_data$married)
0 1
66354 83272
table(WVS_data$employed)
0 1
69519 80107
table(WVS_data$education)
0 1
38011 111615
PREP THE DATASET FOR ANALYSIS HARDSHIP ####################
head(WVS_data)
csv_path <- "/Users/laurabazzigher/Documents/GitHub/risk_wvs/data/Data_Mata_et_al._2016/countryfacts_selection_new.csv"
countryfacts <- read.csv(csv_path, as.is = TRUE, header = TRUE)
labels=c("code","country","codeWVS","Homicide","GDP","InfMort","LifeExp","GINI","GenderPEdu")
names(countryfacts)=labels
missings=colSums(is.na(countryfacts[,4:9]))/77 # proportion of missing values for each of the hardship indicators
unique(WVS_data$country_code) %in% countryfacts$codeWVS # check that all countries in the subset of the WVS data are included in the countryfacts file
[1] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[26] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[51] TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE TRUE
[76] TRUE TRUE
countryfacts
# Zeige eindeutige Werte für jede Spalte in countryfacts an
for (col in names(countryfacts)) {
unique_values <- unique(countryfacts[[col]])
cat("Unique values for column", col, ":", "\n")
for (val in unique_values) {
cat(val, "\n")
}
cat("\n")
}
Unique values for column code :
DZA
ADO
AZE
ARG
AUS
BHR
ARM
BRA
BGR
BLR
CAN
CHL
CHN
TW
COL
CYP
ECU
ETH
EST
FIN
FRA
GEO
PS
DEU
GHA
HUN
IND
IDN
IRN
GI
JPN
KAZ
JOR
KOR
KWT
KGZ
LBN
LBY
MYS
MLI
MEX
MDA
MAR
NLD
NZL
NGA
NOR
PAK
PER
PHL
POL
QAT
ROM
RUS
RWA
SGP
VNM
SVN
ZAF
ZWE
ESP
SWE
CHE
THA
TTO
TUN
TUR
UKR
EGY
UK
USA
BFA
URY
UZB
YEM
SRB
ZMB
HTI
Unique values for column country :
Algeria
Andorra
Azerbaijan
Argentina
Australia
Bahrain
Armenia
Brazil
Bulgaria
Belarus
Canada
Chile
China
Taiwan
Colombia
Cyprus
Ecuador
Ethiopia
Estonia
Finland
France
Georgia
Palestine
Germany
Ghana
Hungary
India
Indonesia
Iran
Gibraltar
Japan
Kazakhstan
Jordan
South Korea
Kuwait
Kyrgyzstan
Lebanon
Libya
Malaysia
Mali
Mexico
Moldova
Morocco
Netherlands
New Zealand
Nigeria
Norway
Pakistan
Peru
Philippines
Poland
Qatar
Romania
Russia
Rwanda
Singapore
Vietnam
Slovenia
South Africa
Zimbabwe
Spain
Sweden
Switzerland
Thailand
Trinidad
Tunisia
Turkey
Ukraine
Egypt
Great Britain
USA
Burkina Faso
Uruguay
Uzbekistan
Yemen
Serbia
Zambia
Haiti
Unique values for column codeWVS :
12
20
31
32
36
48
51
76
100
112
124
152
156
158
170
196
218
231
233
246
250
268
275
276
288
348
356
360
364
368
392
398
400
410
414
417
422
434
458
466
484
498
504
528
554
566
578
586
604
608
616
634
642
643
646
702
704
705
710
716
724
752
756
764
780
788
792
804
818
826
840
854
858
860
887
688
894
332
Unique values for column Homicide :
4.4
0.8
2.4
6
1.1
2.1
32.4
1.9
6.2
1.8
4.6
NA
43.9
2
13.8
8
5.4
1.4
1
4.8
10
1.5
4.3
4.7
0.4
9.2
2.9
3.1
9.1
2.6
11
22
7.5
2.5
0.9
1.2
10.1
0.6
8.9
12.4
7.1
13.1
5.8
4
0.7
35.7
15.1
5.5
35.3
2.7
5.2
5.1
9.8
7.9
3.2
1.6
10.5
20.98444
Unique values for column GDP :
7500
37200
10800
18600
43000
51400
6300
12100
14400
16100
43100
19100
9800
39600
11100
24500
10600
1300
22400
35900
35700
6100
NA
39500
3500
19800
4000
5200
12800
37100
14100
33200
42100
2500
15800
11300
17500
1100
15600
3800
5500
43300
30400
2800
55400
3100
4700
21100
102100
18100
1500
62400
27400
11500
600
30100
40900
54800
9900
20300
15300
7400
6600
37300
52800
16600
1800
2900
Unique values for column InfMort :
21.76
3.69
26.67
9.96
4.43
9.68
13.97
19.21
15.08
3.64
4.71
7.02
14.79
4.49
15.02
8.54
17.93
55.77
6.7
3.36
3.31
16.68
NA
3.46
38.52
5.09
43.19
25.16
39
6.29
2.13
21.61
15.73
3.93
7.51
28.71
7.98
11.87
13.69
104.34
12.58
12.93
24.52
3.66
4.59
74.09
2.48
57.48
20.21
17.64
6.19
6.42
10.16
7.08
59.59
2.53
18.99
4.04
41.61
26.55
3.33
2.6
3.73
9.86
24.82
23.19
21.43
8.1
22.41
4.4
6.17
76.8
8.97
19.84
50.41
6.16
66.62
41.1
Unique values for column LifeExp :
76.39
82.65
71.91
77.51
82.07
78.58
74.12
73.28
74.33
72.15
81.67
78.44
75.15
79.84
75.25
78.34
76.36
60.75
74.07
79.69
81.66
75.72
NA
80.44
65.75
75.46
67.8
72.17
70.89
79.13
84.46
70.24
74.1
79.8
77.64
70.06
77.22
76.04
74.52
54.95
75.43
70.12
76.51
81.12
80.93
52.62
81.6
67.05
73.23
72.48
76.65
78.38
74.69
70.16
59.26
84.38
72.91
77.83
49.56
55.68
81.47
81.89
82.39
74.18
72.29
75.68
73.29
69.14
73.45
80.42
79.56
54.78
76.81
64.83
75.02
51.83
38.78
Unique values for column GINI :
35.3
NA
33.7
45.8
30.3
30.9
51.9
45.3
27.2
32.1
52.1
47.3
34.2
53.5
32.4
48.5
33
31.3
26.8
46
27
39.4
24.7
36.8
44.5
37.6
28.9
39.7
31.1
33.4
46.2
40.1
48.3
40.9
25.1
36.2
43.7
25
29.6
44.8
34.1
27.4
42
46.8
46.3
23.7
63.1
50.1
34
23
28.7
40
40.2
28.2
30.8
32.3
45
39.5
37.7
38
57.5
66.3
Unique values for column GenderPEdu :
94.426
NA
97.948
98.743
99.513
103.237
114.144
94.182
99.191
100.243
100.678
96.873
99.907
98.117
99.77
100.307
86.105
100.649
99.471
100.044
101.032
99.574
99.883
98.939
102.298
100.046
101.797
99.83
101.458
98.242
98.98
98.716
98.175
92.021
96.044
93.895
88.371
100.231
100.068
95.185
99.051
100.474
91.588
100.45
87.071
99.122
97.843
100.376
90.42
98.532
100.808
102.225
97.829
100.203
94.856
98.614
99.35
99.61
100
95.254
96.603
97.167
98.551
101.884
96.003
98.38
96.558
96.746
97.046
84.034
100.093
99.196
57.37
# Plot histogram of all hardship indicators
combined_plot <- NULL # Leeres Plot-Objekt erstellen
# Define the vector of labels for the items
items <- c("Homicide","GDP","InfMort","LifeExp","GINI","GenderPEdu")
# Loop durch jedes Item und füge das Histogramm zum kombinierten Plot hinzu
for (item in items) {
# Erstelle ein Histogramm für das aktuelle Item
plot <- ggplot(countryfacts, aes_string(x = item)) +
geom_histogram(binwidth = 1, fill = "skyblue", color = "black") +
labs(title = paste(item),
x = item,
y = "Frequency") +
theme_minimal()
# Füge das Histogramm zum kombinierten Plot hinzu
if (is.null(combined_plot)) {
combined_plot <- plot
} else {
combined_plot <- combined_plot + plot
}
}
# Zeige den kombinierten Plot an
combined_plot
countryfacts$Homicide=log(countryfacts$Homicide)
countryfacts$GDP=log(countryfacts$GDP)
countryfacts$InfMort=log(countryfacts$InfMort)
countryfacts$LifeExp=log(countryfacts$LifeExp)
#countryfacts$GINI=log(countryfacts$GINI) # not transformed
countryfacts$GenderPEdu=log(countryfacts$GenderPEdu)
countryfacts
# Reverse Codierung
countryfacts$Homicide=scale(countryfacts$Homicide)
countryfacts$GDP=scale(-countryfacts$GDP)
countryfacts$InfMort=scale(countryfacts$InfMort)
countryfacts$LifeExp=scale(-countryfacts$LifeExp)
countryfacts$GINI=scale(countryfacts$GINI)
countryfacts$GenderPEdu=scale(-countryfacts$GenderPEdu)
countryfacts
countryfacts$hardship <- rowMeans(countryfacts[, c("Homicide", "GDP", "GINI", "LifeExp", "InfMort", "GenderPEdu")], na.rm = TRUE)
countryfacts
# Plot histogram of all hardship indicators after log transform
# Leeres Plot-Objekt erstellen
combined_plot <- NULL
# Define the vector of labels for the items
items <- c("Homicide", "GDP", "GINI", "LifeExp", "InfMort", "GenderPEdu", "hardship")
# Loop durch jedes Item und füge das Histogramm zum kombinierten Plot hinzu
for (item in items) {
# Erstelle ein Histogramm für das aktuelle Item
plot <- ggplot(countryfacts, aes_string(x = item)) +
geom_histogram(binwidth = 1, fill = "skyblue", color = "black") +
labs(title = paste(item),
x = item,
y = "Frequency") +
theme_minimal()
# Füge das Histogramm zum kombinierten Plot hinzu
if (is.null(combined_plot)) {
combined_plot <- plot
} else {
combined_plot <- combined_plot + plot
}
}
# Zeige den kombinierten Plot an
combined_plot
panel.cor = function(x, y, digits = 2, cex.cor, ...)
{
usr = par("usr"); on.exit(par(usr))
par(usr = c(0, 1, 0, 1))
# correlation coefficient
r = cor(x, y,use="complete.obs")
txt = format(c(r, 0.123456789), digits = digits)[1]
txt = paste("r= ", txt, sep = "")
text(0.5, 0.6, txt)
# p-value calculation
p = cor.test(x, y,use="complete.obs")$p.value
txt2 = format(c(p, 0.123456789), digits = digits)[1]
txt2 = paste("p= ", txt2, sep = "")
if(p<0.01) txt2 = paste("p ", "<0.01", sep = "")
text(0.5, 0.4, txt2)
}
pairs(countryfacts[,4:10], upper.panel = panel.cor,las=1,cex.labels=.9)
dev.print(postscript,"scatter_indicators.eps",width=8, height=8,horizontal=FALSE,onefile=FALSE)
quartz_off_screen
2
library(psych)
# Subset der Hardship-Indikatoren aus dem countryfacts-Datensatz auswählen
hardship_subset <- countryfacts[, c("Homicide", "GDP", "InfMort", "LifeExp", "GINI", "GenderPEdu")]
# Cronbach's Alpha berechnen
alpha_result <- alpha(hardship_subset)
Number of categories should be increased in order to count frequencies.
alpha_result
Reliability analysis
Call: alpha(x = hardship_subset)
95% confidence boundaries
Reliability if an item is dropped:
Item statistics
# Ersetzen Sie "USA" durch "United States" im countryfacts-Datensatz
countryfacts$country[countryfacts$country == "USA"] <- "United States"
# Merge data from WVS_data and countryfacts
data <- merge(WVS_data, countryfacts[, c("codeWVS", "country", "Homicide", "GDP", "InfMort", "LifeExp", "GINI", "GenderPEdu", "hardship")],
by.x = "country_code", by.y = "codeWVS", all.x = TRUE)
# Remove the redundant column 'country_code'
data <- data[, -which(names(data) == "country_code")]
# Set USA as the reference country
if ("United States" %in% data$country) {
data$country <- factor(data$country, levels = c("United States", unique(data$country)[!data$country %in% "United States"]))
} else {
# If "United States" is not found, keep the country variable as is
print("Warning: 'United States' not found in the country variable.")
}
[1] "Warning: 'United States' not found in the country variable."
# Display the updated data matrix
print(data)
#Transformation of item risktaking
data$risktaking = 6 - data$risktaking + 1
# Define intervals for risktaking
interval <- cut(data$risktaking, breaks = c(-Inf, 1, 3, 5, Inf), labels = c("Very Low", "Low", "Medium", "High"), include.lowest = TRUE)
# Add the ordinal variable "Risktaking_ordinal" to the data frame
data$Risktaking_ordinal <- as.factor(interval)
# Display the updated data matrix
print(data)
data$T_score_risktaking = 10*scale(data$risktaking, center=TRUE,scale=TRUE)+50
data
#Transform risk variable into Z score
# Assuming T-scores have a mean of 50 and a standard deviation of 10
#WVS_data$Z_score_risktaking = (WVS_data$T_score_risktaking - 50) / 10
# Print the resulting data frame
print(data)
data <- data %>%
group_by(country.x) %>%
mutate(z_score_age = scale(age))
data
Check Laura on Items #########
length(unique(data$country.x))
[1] 77
nrow(data)
[1] 149626
range(data$age, na.rm = TRUE)
[1] 15 99
table(data$gender)
0 1
71689 77937
table(data$children)
0 1
44434 105192
table(data$married)
0 1
66354 83272
table(data$employed)
0 1
69519 80107
table(data$education)
0 1
38011 111615
PREP THE DATASET FOR ANALYSIS MIXED-MODELS ####################
#Add Hardship to WVS_data
head(WVS_data)
MIXED-MODELS #################### #################### ####################
MIXED-MODELS WVS-DATA ####################
model0 = lmer(risktaking ~ 1 + (1|country.x),data = data)
summary_model0=summary(model0)
model1 <- lmer(risktaking ~ 1 + scale(z_score_age) + factor(gender) + (1 + scale(z_score_age) + factor(gender) | country.x),
data = data,
control = lmerControl(optimizer = "bobyqa"))
summary_model1=summary(model1)
print(summary_model1) # Koeffizientenübersicht des Modells anzeigen
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: risktaking ~ 1 + scale(z_score_age) + factor(gender) + (1 + scale(z_score_age) + factor(gender) | country.x)
Data: data
Control: lmerControl(optimizer = "bobyqa")
REML criterion at convergence: 539948.2
Scaled residuals:
Min 1Q Median 3Q Max
-2.4670 -0.7808 -0.0780 0.7529 3.2231
Random effects:
Groups Name Variance Std.Dev. Corr
country.x (Intercept) 0.19347 0.4399
scale(z_score_age) 0.02074 0.1440 0.39
factor(gender)1 0.02200 0.1483 0.19 0.31
Residual 2.15080 1.4666
Number of obs: 149626, groups: country.x, 77
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 3.40977 0.05049 76.25592 67.54 <2e-16 ***
scale(z_score_age) -0.30749 0.01694 75.86604 -18.15 <2e-16 ***
factor(gender)1 -0.36821 0.01884 73.37044 -19.54 <2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) sc(__)
scl(z_scr_) 0.377
fctr(gndr)1 0.130 0.276
# Zusammenfassung des Modells anzeigen
summary_model1 <- summary(model1)
# Gewünschte Werte extrahieren und formatieren
results_model1 <- data.frame(
Predictor = c("Intercept", "Age", "Gender"),
Estimate = c(summary_model1$coefficients["(Intercept)", "Estimate"],
summary_model1$coefficients["scale(z_score_age)", "Estimate"],
summary_model1$coefficients["factor(gender)1", "Estimate"]),
SE = c(summary_model1$coefficients["(Intercept)", "Std. Error"],
summary_model1$coefficients["scale(z_score_age)", "Std. Error"],
summary_model1$coefficients["factor(gender)1", "Std. Error"]),
T_score = c(summary_model1$coefficients["(Intercept)", "t value"],
summary_model1$coefficients["scale(z_score_age)", "t value"],
summary_model1$coefficients["factor(gender)1", "t value"]),
p_value = c(summary_model1$coefficients["(Intercept)", "Pr(>|t|)"],
summary_model1$coefficients["scale(z_score_age)", "Pr(>|t|)"],
summary_model1$coefficients["factor(gender)1", "Pr(>|t|)"])
)
# Formatierung der p-Werte
results_model1$p_value <- ifelse(results_model1$p_value < 0.001, "< .001", sprintf("%.3f", results_model1$p_value))
# Ergebnisse anzeigen
print(results_model1)
model2 = lmer(risktaking ~ 1+scale(z_score_age)+factor(gender) + factor(children) + factor(married) + factor(employed) + factor(education)+ (1+scale(z_score_age)+factor(gender)+ factor(children) + factor(married) + factor(employed) + factor(education)|country.x),data = data,control=lmerControl(optCtrl=list(maxfun=30000),optimizer="bobyqa"))
summary_model2=summary(model2)
print(summary_model2) # Koeffizientenübersicht des Modells anzeigen
Linear mixed model fit by REML. t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: risktaking ~ 1 + scale(z_score_age) + factor(gender) + factor(children) +
factor(married) + factor(employed) + factor(education) + (1 + scale(z_score_age) + factor(gender) + factor(children) +
factor(married) + factor(employed) + factor(education) | country.x)
Data: data
Control: lmerControl(optCtrl = list(maxfun = 30000), optimizer = "bobyqa")
REML criterion at convergence: 538477.5
Scaled residuals:
Min 1Q Median 3Q Max
-2.57012 -0.78107 -0.08456 0.74191 3.14746
Random effects:
Groups Name Variance Std.Dev. Corr
country.x (Intercept) 0.155323 0.39411
scale(z_score_age) 0.014315 0.11964 0.33
factor(gender)1 0.023376 0.15289 0.08 0.31
factor(children)1 0.021160 0.14546 0.09 0.19 0.08
factor(married)1 0.010314 0.10156 0.27 0.49 0.54 0.25
factor(employed)1 0.007374 0.08587 -0.04 0.02 0.02 -0.28 -0.23
factor(education)1 0.015087 0.12283 -0.16 0.05 0.10 -0.15 0.08 0.19
Residual 2.125948 1.45806
Number of obs: 149626, groups: country.x, 77
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 3.51338 0.04720 77.54445 74.441 < 2e-16 ***
scale(z_score_age) -0.22513 0.01455 75.52583 -15.469 < 2e-16 ***
factor(gender)1 -0.34471 0.01947 73.55877 -17.706 < 2e-16 ***
factor(children)1 -0.20994 0.02071 72.09426 -10.139 1.62e-15 ***
factor(married)1 -0.13513 0.01558 62.50011 -8.675 2.53e-12 ***
factor(employed)1 0.01777 0.01333 68.20440 1.334 0.187
factor(education)1 0.12542 0.01870 51.20044 6.708 1.55e-08 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) sc(__) fctr(g)1 fctr(c)1 fctr(mr)1 fctr(mp)1
scl(z_scr_) 0.291
fctr(gndr)1 0.024 0.277
fctr(chld)1 0.010 0.057 0.013
fctr(mrrd)1 0.172 0.335 0.383 -0.050
fctr(mply)1 -0.079 0.052 0.084 -0.207 -0.157
fctr(dctn)1 -0.256 0.087 0.078 -0.080 0.036 0.063
# Zusammenfassung des Modells anzeigen
summary_model2 <- summary(model2)
# Gewünschte Werte extrahieren und formatieren
results_model2 <- data.frame(
Predictor = c("Intercept", "Age", "Gender", "Parental status", "Marital status", "Occupational status", "Education"),
Estimate = c(summary_model2$coefficients["(Intercept)", "Estimate"],
summary_model2$coefficients["scale(z_score_age)", "Estimate"],
summary_model2$coefficients["factor(gender)1", "Estimate"],
summary_model2$coefficients["factor(children)1", "Estimate"],
summary_model2$coefficients["factor(married)1", "Estimate"],
summary_model2$coefficients["factor(employed)1", "Estimate"],
summary_model2$coefficients["factor(education)1", "Estimate"]),
SE = c(summary_model2$coefficients["(Intercept)", "Std. Error"],
summary_model2$coefficients["scale(z_score_age)", "Std. Error"],
summary_model2$coefficients["factor(gender)1", "Std. Error"],
summary_model2$coefficients["factor(children)1", "Std. Error"],
summary_model2$coefficients["factor(married)1", "Std. Error"],
summary_model2$coefficients["factor(employed)1", "Std. Error"],
summary_model2$coefficients["factor(education)1", "Std. Error"]),
T_score = c(summary_model2$coefficients["(Intercept)", "t value"],
summary_model2$coefficients["scale(z_score_age)", "t value"],
summary_model2$coefficients["factor(gender)1", "t value"],
summary_model2$coefficients["factor(children)1", "t value"],
summary_model2$coefficients["factor(married)1", "t value"],
summary_model2$coefficients["factor(employed)1", "t value"],
summary_model2$coefficients["factor(education)1", "t value"]),
p_value = c(summary_model2$coefficients["(Intercept)", "Pr(>|t|)"],
summary_model2$coefficients["scale(z_score_age)", "Pr(>|t|)"],
summary_model2$coefficients["factor(gender)1", "Pr(>|t|)"],
summary_model2$coefficients["factor(children)1", "Pr(>|t|)"],
summary_model2$coefficients["factor(married)1", "Pr(>|t|)"],
summary_model2$coefficients["factor(employed)1", "Pr(>|t|)"],
summary_model2$coefficients["factor(education)1", "Pr(>|t|)"])
)
# Formatierung der p-Werte
results_model2$p_value <- ifelse(results_model2$p_value < 0.001, "< .001", sprintf("%.3f", results_model2$p_value))
# Ergebnisse anzeigen
print(results_model2)
model3 <- lmer(risktaking ~ 1+scale(z_score_age)*hardship+factor(gender)*hardship + factor(children) + factor(married) + factor(employed) + factor(education)+ (1+scale(z_score_age)+factor(gender)+ factor(children) + factor(married) + factor(employed) + factor(education)|country.x),data = data,control=lmerControl(optCtrl=list(maxfun=30000),optimizer="bobyqa"),REML = FALSE)
summary_model3=summary(model3)
print(summary_model3) # Koeffizientenübersicht des Modells anzeigen
Linear mixed model fit by maximum likelihood . t-tests use Satterthwaite's method ['lmerModLmerTest']
Formula: risktaking ~ 1 + scale(z_score_age) * hardship + factor(gender) *
hardship + factor(children) + factor(married) + factor(employed) +
factor(education) + (1 + scale(z_score_age) + factor(gender) +
factor(children) + factor(married) + factor(employed) + factor(education) | country.x)
Data: data
Control: lmerControl(optCtrl = list(maxfun = 30000), optimizer = "bobyqa")
AIC BIC logLik deviance df.resid
534950.2 535336.6 -267436.1 534872.2 148619
Scaled residuals:
Min 1Q Median 3Q Max
-2.56338 -0.78122 -0.08385 0.74212 3.15065
Random effects:
Groups Name Variance Std.Dev. Corr
country.x (Intercept) 0.15017 0.38752
scale(z_score_age) 0.01154 0.10742 0.28
factor(gender)1 0.01854 0.13617 0.01 0.14
factor(children)1 0.02073 0.14398 0.06 0.10 -0.02
factor(married)1 0.01027 0.10136 0.19 0.30 0.35 0.26
factor(employed)1 0.00723 0.08503 -0.01 0.09 0.08 -0.30 -0.25
factor(education)1 0.01484 0.12181 -0.18 0.03 0.08 -0.17 0.08 0.18
Residual 2.12501 1.45774
Number of obs: 148658, groups: country.x, 76
Fixed effects:
Estimate Std. Error df t value Pr(>|t|)
(Intercept) 3.51401 0.04677 77.02604 75.132 < 2e-16 ***
scale(z_score_age) -0.22398 0.01334 74.68321 -16.795 < 2e-16 ***
hardship 0.07198 0.05677 75.01318 1.268 0.20880
factor(gender)1 -0.34334 0.01787 68.94003 -19.212 < 2e-16 ***
factor(children)1 -0.21088 0.02066 72.11331 -10.206 1.22e-15 ***
factor(married)1 -0.13578 0.01566 62.04300 -8.671 2.72e-12 ***
factor(employed)1 0.01587 0.01332 69.11071 1.191 0.23756
factor(education)1 0.12400 0.01870 51.28115 6.630 2.04e-08 ***
scale(z_score_age):hardship 0.05477 0.01647 74.79099 3.327 0.00136 **
hardship:factor(gender)1 0.06775 0.02172 70.14037 3.120 0.00263 **
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
Correlation of Fixed Effects:
(Intr) sc(__) hrdshp fctr(g)1 fctr(c)1 fctr(mr)1 fctr(mp)1 fctr(d)1 s(__):
scl(z_scr_) 0.249
hardship 0.004 0.005
fctr(gndr)1 -0.045 0.131 0.002
fctr(chld)1 -0.017 -0.023 0.005 -0.058
fctr(mrrd)1 0.115 0.204 -0.002 0.254 -0.040
fctr(mply)1 -0.059 0.106 0.003 0.124 -0.219 -0.168
fctr(dctn)1 -0.266 0.074 0.009 0.059 -0.088 0.035 0.054
scl(z_sc_): 0.007 0.002 0.269 0.000 0.011 -0.005 -0.031 -0.009
hrdshp:f()1 -0.001 0.008 -0.059 0.008 -0.005 -0.010 0.017 0.003 0.066
# Zusammenfassung des Modells anzeigen
summary_model3 <- summary(model3)
# Gewünschte Werte extrahieren und formatieren
results_model3 <- data.frame(
Predictor = c("Intercept", "Age", "Gender", "Parental status", "Marital status", "Occupational status", "Education", "Hardship", "Interaction: Gender * Hardship"),
Estimate = c(summary_model3$coefficients["(Intercept)", "Estimate"],
summary_model3$coefficients["scale(z_score_age)", "Estimate"],
summary_model3$coefficients["factor(gender)1", "Estimate"],
summary_model3$coefficients["factor(children)1", "Estimate"],
summary_model3$coefficients["factor(married)1", "Estimate"],
summary_model3$coefficients["factor(employed)1", "Estimate"],
summary_model3$coefficients["factor(education)1", "Estimate"],
summary_model3$coefficients["hardship", "Estimate"],
summary_model3$coefficients["hardship:factor(gender)1", "Estimate"]),
SE = c(summary_model3$coefficients["(Intercept)", "Std. Error"],
summary_model3$coefficients["scale(z_score_age)", "Std. Error"],
summary_model3$coefficients["factor(gender)1", "Std. Error"],
summary_model3$coefficients["factor(children)1", "Std. Error"],
summary_model3$coefficients["factor(married)1", "Std. Error"],
summary_model3$coefficients["factor(employed)1", "Std. Error"],
summary_model3$coefficients["factor(education)1", "Std. Error"],
summary_model3$coefficients["hardship", "Std. Error"],
summary_model3$coefficients["hardship:factor(gender)1", "Std. Error"]),
T_score = c(summary_model3$coefficients["(Intercept)", "t value"],
summary_model3$coefficients["scale(z_score_age)", "t value"],
summary_model3$coefficients["factor(gender)1", "t value"],
summary_model3$coefficients["factor(children)1", "t value"],
summary_model3$coefficients["factor(married)1", "t value"],
summary_model3$coefficients["factor(employed)1", "t value"],
summary_model3$coefficients["factor(education)1", "t value"],
summary_model3$coefficients["hardship", "t value"],
summary_model3$coefficients["hardship:factor(gender)1", "t value"]),
p_value = c(summary_model3$coefficients["(Intercept)", "Pr(>|t|)"],
summary_model3$coefficients["scale(z_score_age)", "Pr(>|t|)"],
summary_model3$coefficients["factor(gender)1", "Pr(>|t|)"],
summary_model3$coefficients["factor(children)1", "Pr(>|t|)"],
summary_model3$coefficients["factor(married)1", "Pr(>|t|)"],
summary_model3$coefficients["factor(employed)1", "Pr(>|t|)"],
summary_model3$coefficients["factor(education)1", "Pr(>|t|)"],
summary_model3$coefficients["hardship", "Pr(>|t|)"],
summary_model3$coefficients["hardship:factor(gender)1", "Pr(>|t|)"])
)
Error in summary_model3$coefficients["scale(z_score_age)", "Estimate"] :
subscript out of bounds
anova(model0,model1)
refitting model(s) with ML (instead of REML)
Data: data
Models:
model0: risktaking ~ 1 + (1 | country.x)
model1: risktaking ~ 1 + scale(z_score_age) + factor(gender) + (1 + scale(z_score_age) + factor(gender) | country.x)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
model0 3 545328 545357 -272661 545322
model1 10 536124 536223 -268052 536104 9217.4 7 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(model1,model2)
refitting model(s) with ML (instead of REML)
Data: data
Models:
model1: risktaking ~ 1 + scale(z_score_age) + factor(gender) + (1 + scale(z_score_age) + factor(gender) | country.x)
model2: risktaking ~ 1 + scale(z_score_age) + factor(gender) + factor(children) + factor(married) + factor(employed) + factor(education) + (1 + scale(z_score_age) + factor(gender) + factor(children) + factor(married) + factor(employed) + factor(education) | country.x)
npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
model1 10 536124 536223 -268052 536104
model2 36 534730 535087 -267329 534658 1446 26 < 2.2e-16 ***
---
Signif. codes: 0 ‘***’ 0.001 ‘**’ 0.01 ‘*’ 0.05 ‘.’ 0.1 ‘ ’ 1
anova(model2,model3)
Error in anova.merMod(model2, model3) :
models were not all fitted to the same size of dataset
coefsallmodels=rbind(summary_model1$coefficients,
summary_model2$coefficients,
summary_model3$coefficients[c(1:2,4:8,3,9:10),])
write.csv(coefsallmodels,"coefsallmodels.csv")
file_path <- file.path(getwd(), "coefsallmodels.csv")
file_path
# Extrahieren der Koeffizienten-Tabelle für jedes Modell
coefficients_model0 <- summary(model0)$coefficients
coefficients_model1 <- summary(model1)$coefficients
coefficients_model2 <- summary(model2)$coefficients
coefficients_model3 <- summary(model3)$coefficients
# Filtern der erforderlichen Zeilen aus den Koeffizienten
coefficients_model0 <- coefficients_model0[rownames(coefficients_model0) %in% c("(Intercept)", "scale(z_score_age)", "factor(gender)"), ]
coefficients_model1 <- coefficients_model1[rownames(coefficients_model1) %in% c("(Intercept)", "scale(z_score_age)", "factor(gender)"), ]
coefficients_model2 <- coefficients_model2[rownames(coefficients_model2) %in% c("(Intercept)", "scale(z_score_age)", "factor(gender)", "factor(children)", "factor(married)", "factor(employed)", "factor(education)"), ]
coefficients_model3 <- coefficients_model3[rownames(coefficients_model3) %in% c("(Intercept)", "scale(z_score_age)", "factor(gender)", "factor(children)", "factor(married)", "factor(employed)", "factor(education)", "hardship", "scale(z_score_age):hardship", "factor(gender):hardship"), ]
# Zusammenführen der geschätzten Koeffizienten aus allen Modellen
coefs_all_models <- rbind(coefficients_model0, coefficients_model1, coefficients_model2, coefficients_model3)
# Erstellen einer Tabelle aus den Koeffizienten
results_table <- data.frame(
Predictor = rownames(coefs_all_models),
b = coefs_all_models[, "Estimate"],
SE = coefs_all_models[, "Std. Error"],
T_score = coefs_all_models[, "t value"],
p_value = coefs_all_models[, "Pr(>|t|)"]
)
# Drucken der Ergebnistabelle
results_table